# Check the file path
here("nhanes3.rda")
## [1] "/Volumes/GoogleDrive/My Drive/Teaching/EPID674/EPID674_Week7_Class/nhanes3.rda"
# Load the saved R data
load(here("nhanes3.rda"))
# Remake a few variables from last class if they are no longer in your environment
sex1 <- factor(nhanes$sex, levels = c(1, 2), labels = c("male", "female"))
AGE5b <- cut(nhanes$age, quantile(nhanes$age, c(0, .2, .4, .6, .8, 1)), include.lowest = T) # quintiles
AGE5c <- cut(nhanes$age, breaks = c(19, 40, 50, 60, 70, 90))
age5c <- unclass(AGE5c)
nhanes <- cbind(nhanes, sex1, AGE5b, AGE5c, age5c)
# Clean up the dataset. Make sure NaN are NA for key covariates
table(nhanes$educ, useNA = "always")
##
## 1 2 3 NaN <NA>
## 2032 2349 660 33 0
nhanes$educ[is.nan(nhanes$educ)] <- NA
table(nhanes$educ, useNA = "always")
##
## 1 2 3 <NA>
## 2032 2349 660 33
table(nhanes$alc, useNA = "always")
##
## 0 1 NaN <NA>
## 2753 2189 132 0
nhanes$alc[is.nan(nhanes$alc)] <- NA
table(nhanes$alc, useNA = "always")
##
## 0 1 <NA>
## 2753 2189 132
tab1(nhanes$d_total) # Variable for death due to any cause in NHANES
## nhanes$d_total :
## Frequency %(NA+) %(NA-)
## 0 3794 74.8 74.8
## 1 1275 25.1 25.2
## NaN 5 0.1 0.0
## Total 5074 100.0 100.0
summ(nhanes$pmon_mec) # Number of months of follow up
## obs. mean median s.d. min. max.
## 5069 158.7 170 49.481 0 217
### Define Surv() object to use in later functions
surv.total <- Surv(nhanes$pmon_mec, nhanes$d_total)
surv.total[1:10] # Includes information on time of follow up and whether the person died, denoted with a "+"
## [1] 203+ 196+ 215+ 132 184+ 143 202+ 206+ 187+ 115
### K-M Life table and curve
fit.total <- survfit(Surv(nhanes$pmon_mec, nhanes$d_total) ~ 1) # Model with no predictors, just outcome
#summary(fit.total)
summary(nhanes$pmon_mec/12) # How many years of follow up do you have?
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 12.42 14.17 13.22 16.08 18.08 5
fit.total # How many people died over this time period?
## Call: survfit(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ 1)
##
## 5 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## 5069 1275 NA NA NA
plot(fit.total)
## suppress 95% CI lines and the time marks for censored subjects.
plot(fit.total, ylim = c(0.7, 1.0), conf.int = F, mark.time = F, ylab="Probability of survival", xlab="Months")
### Survival by different levels of covariates
fit.total.sex <- survfit(Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex)
fit.total.sex # How many people died in each sex group?
## Call: survfit(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex)
##
## 5 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## nhanes$sex=1 2332 679 NA NA NA
## nhanes$sex=2 2737 596 NA NA NA
#summary(fit.total.sex)[1:10]
plot(fit.total.sex, ylim = c(0.6, 1.0), col = c("blue", "red"), lty = c(1, 2), mark.time = F, main = "Kaplan-Meier curve", xlab = "Time (months)", ylab = "Survival probability")
legend("bottomleft", legend = c("Men", "Women"), col = c("blue", "red"), lty = c(1, 2))
### Test for differences in survival curves
survdiff(Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex) # Is there a difference in survival among males and females?
## Call:
## survdiff(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$sex)
##
## n=5069, 5 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## nhanes$sex=1 2332 679 573 19.7 35.8
## nhanes$sex=2 2737 596 702 16.1 35.8
##
## Chisq= 35.8 on 1 degrees of freedom, p= 2e-09
cox.bpb <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$bpb)
summary(cox.bpb) # Is blood Pb associated with survival? Easier to visualize in categories of exposure
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ nhanes$bpb)
##
## n= 5069, number of events= 1275
## (5 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## nhanes$bpb 0.070662 1.073219 0.005597 12.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## nhanes$bpb 1.073 0.9318 1.062 1.085
##
## Concordance= 0.62 (se = 0.008 )
## Likelihood ratio test= 113.3 on 1 df, p=<2e-16
## Wald test = 159.4 on 1 df, p=<2e-16
## Score (logrank) test = 153.9 on 1 df, p=<2e-16
bpb3 <- cut2(nhanes$bpb, g = 3)
tab1(bpb3) # Tertiles of blood Pb
## bpb3 :
## Frequency Percent Cum. percent
## [0.7, 2.4) 1760 34.7 34.7
## [2.4, 4.4) 1672 33.0 67.6
## [4.4,48.1] 1642 32.4 100.0
## Total 5074 100.0 100.0
# K-M Life table and curve
fit.total.bpb3 <- survfit(Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
fit.total.bpb3
## Call: survfit(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
##
## 5 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## factor(bpb3)=[0.7, 2.4) 1758 248 NA NA NA
## factor(bpb3)=[2.4, 4.4) 1670 451 NA NA NA
## factor(bpb3)=[4.4,48.1] 1641 576 NA NA NA
plot(fit.total.bpb3, col = c(1:3), lty = c(1:3), ylim = c(0.6, 1.0), main = "Survival in relation to blood lead levels", xlab = "Time (months)", ylab = "Survival probability")
legend(30, 0.7, legend = c("Q1", "Q2", "Q3"), lty = c(1:3), col = c(1:3))
# crude
cox.bpb3 <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
summary(cox.bpb3)
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ factor(bpb3))
##
## n= 5069, number of events= 1275
## (5 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## factor(bpb3)[2.4, 4.4) 0.70388 2.02158 0.07906 8.903 <2e-16 ***
## factor(bpb3)[4.4,48.1] 1.00452 2.73061 0.07599 13.219 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## factor(bpb3)[2.4, 4.4) 2.022 0.4947 1.731 2.360
## factor(bpb3)[4.4,48.1] 2.731 0.3662 2.353 3.169
##
## Concordance= 0.605 (se = 0.007 )
## Likelihood ratio test= 196.5 on 2 df, p=<2e-16
## Wald test = 174.7 on 2 df, p=<2e-16
## Score (logrank) test = 186.5 on 2 df, p=<2e-16
# adjusted
cox.bpb3.adj <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 + nhanes$age + factor(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) + factor(nhanes$smk) + factor(nhanes$alc))
summary(cox.bpb3.adj)
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 +
## nhanes$age + factor(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) +
## factor(nhanes$smk) + factor(nhanes$alc))
##
## n= 4905, number of events= 1211
## (169 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## bpb3[2.4, 4.4) 0.069004 1.071440 0.081665 0.845 0.39813
## bpb3[4.4,48.1] 0.084549 1.088226 0.083037 1.018 0.30858
## nhanes$age 0.088495 1.092529 0.002373 37.287 < 2e-16 ***
## factor(nhanes$sex)2 -0.294962 0.744560 0.065259 -4.520 6.19e-06 ***
## factor(nhanes$race)2 0.178596 1.195537 0.070922 2.518 0.01180 *
## factor(nhanes$educ)2 0.001651 1.001652 0.062588 0.026 0.97895
## factor(nhanes$educ)3 -0.205472 0.814263 0.102174 -2.011 0.04432 *
## factor(nhanes$smk)2 0.171628 1.187237 0.070372 2.439 0.01473 *
## factor(nhanes$smk)3 0.631970 1.881314 0.082193 7.689 1.48e-14 ***
## factor(nhanes$alc)1 -0.182403 0.833265 0.067767 -2.692 0.00711 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## bpb3[2.4, 4.4) 1.0714 0.9333 0.9130 1.2574
## bpb3[4.4,48.1] 1.0882 0.9189 0.9248 1.2806
## nhanes$age 1.0925 0.9153 1.0875 1.0976
## factor(nhanes$sex)2 0.7446 1.3431 0.6552 0.8462
## factor(nhanes$race)2 1.1955 0.8364 1.0404 1.3738
## factor(nhanes$educ)2 1.0017 0.9984 0.8860 1.1324
## factor(nhanes$educ)3 0.8143 1.2281 0.6665 0.9948
## factor(nhanes$smk)2 1.1872 0.8423 1.0343 1.3628
## factor(nhanes$smk)3 1.8813 0.5315 1.6014 2.2102
## factor(nhanes$alc)1 0.8333 1.2001 0.7296 0.9516
##
## Concordance= 0.857 (se = 0.005 )
## Likelihood ratio test= 2351 on 10 df, p=<2e-16
## Wald test = 1629 on 10 df, p=<2e-16
## Score (logrank) test = 2328 on 10 df, p=<2e-16
### Test for the proportional hazards assumption
test.prop <- cox.zph(cox.bpb3.adj) # Do any of the variables violate the proportional hazards assumption? May consider stratifying by sex.
test.prop
## chisq df p
## bpb3 0.7663 2 0.682
## nhanes$age 1.3879 1 0.239
## factor(nhanes$sex) 4.4387 1 0.035
## factor(nhanes$race) 0.3733 1 0.541
## factor(nhanes$educ) 0.1618 2 0.922
## factor(nhanes$smk) 0.4093 2 0.815
## factor(nhanes$alc) 0.0235 1 0.878
## GLOBAL 8.0277 10 0.626
## Display a graph of the scaled Schoenfeld residuals, along with a smooth curve
plot(test.prop) # for all variables
plot(test.prop, var = 4) + # Can call up variables indivdually, here for sex
abline(h = 0, lty = 3, col = 2)
## integer(0)
# Stratify by sex
cox.bpb3.adj1 <- coxph(Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 + nhanes$age + strata(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) + factor(nhanes$smk) + factor(nhanes$alc))
summary(cox.bpb3.adj1)
## Call:
## coxph(formula = Surv(nhanes$pmon_mec, nhanes$d_total) ~ bpb3 +
## nhanes$age + strata(nhanes$sex) + factor(nhanes$race) + factor(nhanes$educ) +
## factor(nhanes$smk) + factor(nhanes$alc))
##
## n= 4905, number of events= 1211
## (169 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## bpb3[2.4, 4.4) 0.069557 1.072033 0.081702 0.851 0.39458
## bpb3[4.4,48.1] 0.088475 1.092506 0.083082 1.065 0.28692
## nhanes$age 0.088456 1.092486 0.002374 37.260 < 2e-16 ***
## factor(nhanes$race)2 0.178662 1.195616 0.070942 2.518 0.01179 *
## factor(nhanes$educ)2 0.001237 1.001238 0.062599 0.020 0.98424
## factor(nhanes$educ)3 -0.209755 0.810783 0.102180 -2.053 0.04009 *
## factor(nhanes$smk)2 0.166505 1.181170 0.070394 2.365 0.01801 *
## factor(nhanes$smk)3 0.631710 1.880825 0.082187 7.686 1.51e-14 ***
## factor(nhanes$alc)1 -0.179672 0.835545 0.067736 -2.653 0.00799 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## bpb3[2.4, 4.4) 1.0720 0.9328 0.9134 1.2582
## bpb3[4.4,48.1] 1.0925 0.9153 0.9283 1.2857
## nhanes$age 1.0925 0.9153 1.0874 1.0976
## factor(nhanes$race)2 1.1956 0.8364 1.0404 1.3740
## factor(nhanes$educ)2 1.0012 0.9988 0.8856 1.1319
## factor(nhanes$educ)3 0.8108 1.2334 0.6636 0.9906
## factor(nhanes$smk)2 1.1812 0.8466 1.0289 1.3559
## factor(nhanes$smk)3 1.8808 0.5317 1.6010 2.2096
## factor(nhanes$alc)1 0.8355 1.1968 0.7317 0.9542
##
## Concordance= 0.855 (se = 0.005 )
## Likelihood ratio test= 2315 on 9 df, p=<2e-16
## Wald test = 1599 on 9 df, p=<2e-16
## Score (logrank) test = 2294 on 9 df, p=<2e-16
cox.zph(cox.bpb3.adj1)
## chisq df p
## bpb3 0.443 2 0.80
## nhanes$age 1.321 1 0.25
## factor(nhanes$race) 0.404 1 0.53
## factor(nhanes$educ) 0.156 2 0.93
## factor(nhanes$smk) 1.038 2 0.60
## factor(nhanes$alc) 0.476 1 0.49
## GLOBAL 3.675 9 0.93
Optional: Exercise 7A
Not a graded assignment.
##Distribution of primary sampling unit (psu) and cluster (strata)
tab1(nhanes$psu) # 2 PSU
## nhanes$psu :
## Frequency Percent Cum. percent
## 1 2481 48.9 48.9
## 2 2593 51.1 100.0
## Total 5074 100.0 100.0
tab1(nhanes$strata) # 49 strata
## nhanes$strata :
## Frequency Percent Cum. percent
## 1 102 2.0 2.0
## 2 116 2.3 4.3
## 3 113 2.2 6.5
## 4 90 1.8 8.3
## 5 89 1.8 10.1
## 6 99 2.0 12.0
## 7 124 2.4 14.4
## 8 120 2.4 16.8
## 9 105 2.1 18.9
## 10 121 2.4 21.3
## 11 115 2.3 23.5
## 12 103 2.0 25.6
## 13 110 2.2 27.7
## 14 125 2.5 30.2
## 15 127 2.5 32.7
## 16 141 2.8 35.5
## 17 143 2.8 38.3
## 18 126 2.5 40.8
## 19 134 2.6 43.4
## 20 119 2.3 45.8
## 21 106 2.1 47.9
## 22 128 2.5 50.4
## 23 114 2.2 52.6
## 24 116 2.3 54.9
## 25 112 2.2 57.1
## 26 120 2.4 59.5
## 27 128 2.5 62.0
## 28 116 2.3 64.3
## 29 127 2.5 66.8
## 30 112 2.2 69.0
## 31 132 2.6 71.6
## 32 103 2.0 73.6
## 33 127 2.5 76.1
## 34 122 2.4 78.5
## 35 66 1.3 79.8
## 36 53 1.0 80.9
## 37 37 0.7 81.6
## 38 46 0.9 82.5
## 39 86 1.7 84.2
## 40 57 1.1 85.3
## 41 70 1.4 86.7
## 42 44 0.9 87.6
## 43 99 2.0 89.5
## 44 111 2.2 91.7
## 45 79 1.6 93.3
## 46 57 1.1 94.4
## 47 51 1.0 95.4
## 48 148 2.9 98.3
## 49 85 1.7 100.0
## Total 5074 100.0 100.0
nhanes$locode<-ifelse(nhanes$psu==1, nhanes$strata, nhanes$strata+49)
tab1(nhanes$locode)
## nhanes$locode :
## Frequency Percent Cum. percent
## 1 50 1.0 1.0
## 2 49 1.0 2.0
## 3 55 1.1 3.0
## 4 45 0.9 3.9
## 5 49 1.0 4.9
## 6 50 1.0 5.9
## 7 59 1.2 7.0
## 8 59 1.2 8.2
## 9 52 1.0 9.2
## 10 65 1.3 10.5
## 11 55 1.1 11.6
## 12 45 0.9 12.5
## 13 54 1.1 13.5
## 14 67 1.3 14.9
## 15 61 1.2 16.1
## 16 69 1.4 17.4
## 17 68 1.3 18.8
## 18 60 1.2 19.9
## 19 59 1.2 21.1
## 20 54 1.1 22.2
## 21 55 1.1 23.3
## 22 57 1.1 24.4
## 23 70 1.4 25.8
## 24 59 1.2 26.9
## 25 50 1.0 27.9
## 26 49 1.0 28.9
## 27 76 1.5 30.4
## 28 58 1.1 31.5
## 29 60 1.2 32.7
## 30 50 1.0 33.7
## 31 60 1.2 34.9
## 32 52 1.0 35.9
## 33 64 1.3 37.2
## 34 55 1.1 38.2
## 35 33 0.7 38.9
## 36 26 0.5 39.4
## 37 19 0.4 39.8
## 38 21 0.4 40.2
## 39 48 0.9 41.1
## 40 31 0.6 41.7
## 41 25 0.5 42.2
## 42 21 0.4 42.6
## 43 52 1.0 43.7
## 44 48 0.9 44.6
## 45 40 0.8 45.4
## 46 31 0.6 46.0
## 47 24 0.5 46.5
## 48 83 1.6 48.1
## 49 39 0.8 48.9
## 50 52 1.0 49.9
## 51 67 1.3 51.2
## 52 58 1.1 52.4
## 53 45 0.9 53.3
## 54 40 0.8 54.1
## 55 49 1.0 55.0
## 56 65 1.3 56.3
## 57 61 1.2 57.5
## 58 53 1.0 58.6
## 59 56 1.1 59.7
## 60 60 1.2 60.8
## 61 58 1.1 62.0
## 62 56 1.1 63.1
## 63 58 1.1 64.2
## 64 66 1.3 65.5
## 65 72 1.4 66.9
## 66 75 1.5 68.4
## 67 66 1.3 69.7
## 68 75 1.5 71.2
## 69 65 1.3 72.5
## 70 51 1.0 73.5
## 71 71 1.4 74.9
## 72 44 0.9 75.8
## 73 57 1.1 76.9
## 74 62 1.2 78.1
## 75 71 1.4 79.5
## 76 52 1.0 80.5
## 77 58 1.1 81.7
## 78 67 1.3 83.0
## 79 62 1.2 84.2
## 80 72 1.4 85.6
## 81 51 1.0 86.6
## 82 63 1.2 87.9
## 83 67 1.3 89.2
## 84 33 0.7 89.9
## 85 27 0.5 90.4
## 86 18 0.4 90.7
## 87 25 0.5 91.2
## 88 38 0.7 92.0
## 89 26 0.5 92.5
## 90 45 0.9 93.4
## 91 23 0.5 93.8
## 92 47 0.9 94.8
## 93 63 1.2 96.0
## 94 39 0.8 96.8
## 95 26 0.5 97.3
## 96 27 0.5 97.8
## 97 65 1.3 99.1
## 98 46 0.9 100.0
## Total 5074 100.0 100.0
table(nhanes$psu, nhanes$locode)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 1 50 49 55 45 49 50 59 59 52 65 55 45 54 67 61 69 68 60 59 54 55 57 70 59 50
## 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##
## 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## 1 49 76 58 60 50 60 52 64 55 33 26 19 21 48 31 25 21 52 48 40 31 24 83 39 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 52
##
## 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 2 67 58 45 40 49 65 61 53 56 60 58 56 58 66 72 75 66 75 65 51 71 44 57 62 71
##
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 2 52 58 67 62 72 51 63 67 33 27 18 25 38 26 45 23 47 63 39 26 27 65 46
###Random intercepts only model for SBP
sbp.lme <- lme(fixed = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), random=~1|locode, na.action=na.omit, data=nhanes)
summary(sbp.lme)
## Linear mixed-effects model fit by REML
## Data: nhanes
## AIC BIC logLik
## -7179.826 -7088.897 3603.913
##
## Random effects:
## Formula: ~1 | locode
## (Intercept) Residual
## StdDev: 0.01497941 0.1138104
##
## Fixed effects: log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc)
## Value Std.Error DF t-value p-value
## (Intercept) 4.504171 0.014086651 4793 319.7474 0.0000
## log(bpb) 0.012362 0.002685955 4793 4.6025 0.0000
## age 0.003033 0.000537513 4793 5.6418 0.0000
## I(age^2) 0.000016 0.000005123 4793 3.1197 0.0018
## bmi 0.004815 0.000294457 4793 16.3508 0.0000
## factor(race)2 0.013063 0.004054943 4793 3.2216 0.0013
## factor(sex)2 -0.032211 0.003780680 4793 -8.5200 0.0000
## factor(smk)2 -0.014085 0.004267112 4793 -3.3009 0.0010
## factor(smk)3 -0.000873 0.004340996 4793 -0.2012 0.8406
## factor(educ)2 0.004741 0.003721299 4793 1.2740 0.2027
## factor(educ)3 -0.014662 0.005529777 4793 -2.6515 0.0080
## factor(alc)1 0.005850 0.003696056 4793 1.5827 0.1136
## Correlation:
## (Intr) lg(bp) age I(g^2) bmi fctr(r)2 fctr(sx)2 fctr(sm)2
## log(bpb) -0.124
## age -0.693 -0.107
## I(age^2) 0.647 0.052 -0.983
## bmi -0.400 0.034 -0.221 0.222
## factor(race)2 -0.041 -0.112 -0.007 0.037 -0.074
## factor(sex)2 -0.183 0.338 -0.059 0.050 -0.028 -0.029
## factor(smk)2 0.051 -0.011 -0.154 0.124 -0.024 0.054 0.194
## factor(smk)3 -0.044 -0.184 -0.115 0.136 0.105 -0.031 0.056 0.345
## factor(educ)2 -0.180 0.137 -0.025 0.045 0.049 -0.053 -0.034 -0.006
## factor(educ)3 -0.109 0.167 -0.107 0.110 0.088 0.027 0.063 0.051
## factor(alc)1 -0.188 -0.087 0.016 0.029 0.052 0.087 0.222 -0.070
## fctr(s)3 fctr(d)2 fctr(d)3
## log(bpb)
## age
## I(age^2)
## bmi
## factor(race)2
## factor(sex)2
## factor(smk)2
## factor(smk)3
## factor(educ)2 0.039
## factor(educ)3 0.131 0.415
## factor(alc)1 -0.149 -0.111 -0.131
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.63615371 -0.64264248 -0.05180877 0.57432592 5.10209571
##
## Number of Observations: 4902
## Number of Groups: 98
##Return variance-covariance matrix for random variables
getVarCov(sbp.lme)
## Random effects variance covariance matrix
## (Intercept)
## (Intercept) 0.00022438
## Standard Deviations: 0.014979
##Random Intercepts and slopes model
sbp.lme2<-lme(fixed = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), random=~1+log(bpb)|locode, na.action=na.omit, data=nhanes)
summary(sbp.lme2)
## Linear mixed-effects model fit by REML
## Data: nhanes
## AIC BIC logLik
## -7175.826 -7071.907 3603.913
##
## Random effects:
## Formula: ~1 + log(bpb) | locode
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 1.497941e-02 (Intr)
## log(bpb) 1.048939e-05 -0.001
## Residual 1.138104e-01
##
## Fixed effects: log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc)
## Value Std.Error DF t-value p-value
## (Intercept) 4.504171 0.014086651 4793 319.7474 0.0000
## log(bpb) 0.012362 0.002685955 4793 4.6025 0.0000
## age 0.003033 0.000537513 4793 5.6418 0.0000
## I(age^2) 0.000016 0.000005123 4793 3.1197 0.0018
## bmi 0.004815 0.000294457 4793 16.3508 0.0000
## factor(race)2 0.013063 0.004054943 4793 3.2216 0.0013
## factor(sex)2 -0.032211 0.003780680 4793 -8.5200 0.0000
## factor(smk)2 -0.014085 0.004267112 4793 -3.3009 0.0010
## factor(smk)3 -0.000873 0.004340996 4793 -0.2012 0.8406
## factor(educ)2 0.004741 0.003721299 4793 1.2740 0.2027
## factor(educ)3 -0.014662 0.005529777 4793 -2.6515 0.0080
## factor(alc)1 0.005850 0.003696056 4793 1.5827 0.1136
## Correlation:
## (Intr) lg(bp) age I(g^2) bmi fctr(r)2 fctr(sx)2 fctr(sm)2
## log(bpb) -0.124
## age -0.693 -0.107
## I(age^2) 0.647 0.052 -0.983
## bmi -0.400 0.034 -0.221 0.222
## factor(race)2 -0.041 -0.112 -0.007 0.037 -0.074
## factor(sex)2 -0.183 0.338 -0.059 0.050 -0.028 -0.029
## factor(smk)2 0.051 -0.011 -0.154 0.124 -0.024 0.054 0.194
## factor(smk)3 -0.044 -0.184 -0.115 0.136 0.105 -0.031 0.056 0.345
## factor(educ)2 -0.180 0.137 -0.025 0.045 0.049 -0.053 -0.034 -0.006
## factor(educ)3 -0.109 0.167 -0.107 0.110 0.088 0.027 0.063 0.051
## factor(alc)1 -0.188 -0.087 0.016 0.029 0.052 0.087 0.222 -0.070
## fctr(s)3 fctr(d)2 fctr(d)3
## log(bpb)
## age
## I(age^2)
## bmi
## factor(race)2
## factor(sex)2
## factor(smk)2
## factor(smk)3
## factor(educ)2 0.039
## factor(educ)3 0.131 0.415
## factor(alc)1 -0.149 -0.111 -0.131
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.63615371 -0.64264247 -0.05180877 0.57432589 5.10209563
##
## Number of Observations: 4902
## Number of Groups: 98
getVarCov(sbp.lme2)
## Random effects variance covariance matrix
## (Intercept) log(bpb)
## (Intercept) 2.2438e-04 -8.3127e-11
## log(bpb) -8.3127e-11 1.1003e-10
## Standard Deviations: 0.014979 1.0489e-05
## compare these two models
anova(sbp.lme, sbp.lme2)
## Model df AIC BIC logLik Test L.Ratio p-value
## sbp.lme 1 14 -7179.826 -7088.897 3603.913
## sbp.lme2 2 16 -7175.826 -7071.907 3603.913 1 vs 2 2.225843e-06 1
#### first specify the design effect
bpdsn<-svydesign(id=~psu, strata=~strata, weights=~wt_mh, data=nhanes, nest=T)
bpdsn
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (98) clusters.
## svydesign(id = ~psu, strata = ~strata, weights = ~wt_mh, data = nhanes,
## nest = T)
#nest=T: relabel cluster ids to enforce nesting within strata
#### Descriptive analyses
svymean(~ nhanes$age, design=bpdsn)
## mean SE
## nhanes$age 45.041 0.5403
summ(nhanes$age)
## obs. mean median s.d. min. max.
## 5074 48.744 46 19.297 20 90
svymean(~ nhanes$age + nhanes$sbp + nhanes$bpb, design=bpdsn)
## mean SE
## nhanes$age 45.041 0.5403
## nhanes$sbp 122.656 0.4443
## nhanes$bpb 3.463 0.0958
svymean(~ nhanes$sbp + nhanes$bpb + nhanes$bmi, design=bpdsn)
## mean SE
## nhanes$sbp 122.656 Inf
## nhanes$bpb 3.463 Inf
## nhanes$bmi NaN NaN
svymean(~ nhanes$sbp + nhanes$bpb + nhanes$bmi, design=bpdsn, na.rm=T)
## mean SE
## nhanes$sbp 122.6401 0.4437
## nhanes$bpb 3.4614 0.0957
## nhanes$bmi 26.5947 0.1322
svytable(~ nhanes$sex, design=bpdsn)
## nhanes$sex
## 1 2
## 25097913 27754405
svytable(~ nhanes$sex, Ntotal=100, design=bpdsn)
## nhanes$sex
## 1 2
## 47.48687 52.51313
svytable(~ nhanes$sex + nhanes$race, design=bpdsn)
## nhanes$race
## nhanes$sex 1 2
## 1 22552719 2545194
## 2 24334379 3420026
svytable(~ nhanes$sex + nhanes$race, Ntotal=100, design=bpdsn)
## nhanes$race
## nhanes$sex 1 2
## 1 42.671202 4.815671
## 2 46.042217 6.470911
#svychisq(~ nhanes$sex + nhanes$race, design=bpdsn)
chisq.test(nhanes$sex, nhanes$race)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: nhanes$sex and nhanes$race
## X-squared = 5.6875, df = 1, p-value = 0.01709
svytable(~ nhanes$htn + nhanes$smk, design=bpdsn)
## nhanes$smk
## nhanes$htn 1 2 3
## 0 17445187 8024355 11037244
## 1 7361376 5478611 3505545
#svychisq(~ nhanes$htn + nhanes$smk, design=bpdsn, statistic="Wald")
## Survey Chisq used in SUDAAN
## Univariate distributions and bivariate associations using graphical analyses
svyhist(~ nhanes$sbp, design=bpdsn)
svyhist(~ log(nhanes$sbp), design=bpdsn)
svyhist(~ nhanes$bpb, bpdsn)
svyhist(~ log(nhanes$bpb), bpdsn)
svyboxplot(log(nhanes$sbp) ~ 1, bpdsn)
# svyboxplot(log(nhanes$sbp) ~ as.factor(nhanes$smk), bpdsn)
# bp.bysexrace <- svyby(~ nhanes$sbp + nhanes$dbp, ~ nhanes$sex + nhanes$race, bpdsn, svymean)
# barplot(bp.bysexrace)
# barplot(bp.bysexrace, legend=TRUE)
## change the label of x-axis and ylim
# xlabel<-c("Male, White","Female, White","Male, Black","Female, Black")
# barplot(bp.bysexrace, legend=TRUE, ylim=c(0,160),col=c("purple","violet"), names=xlabel)
#simple scatterplot with circles whose area is proportional to the sampling weight
svyplot(log(nhanes$sbp) ~ nhanes$age, bpdsn, style="bubble")
#style="transparent" plots points with opacity proportional to sampling weight
svyplot(log(nhanes$sbp)~log(nhanes$bpb), bpdsn, style="transparent", pch=19, xlab="Blood lead", ylab="log(SBP)")
## Scatterplot smoothing
smth.sbp.bpb1<-svysmooth(log(nhanes$sbp) ~ nhanes$bpb, bpdsn, bandwidth=10)
#fit local polynomial kernel smoothing with a bandwidth=10 bpb unit
plot(smth.sbp.bpb1)
smth.sbp.bpb2<-svysmooth(log(nhanes$sbp) ~nhanes$bpb, bpdsn, bandwidth=20)
plot(smth.sbp.bpb2)
#### Linear and mixed effect models
## First fit a linear regression
sbp.lm<-lm(log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), data=nhanes)
summary(sbp.lm)
##
## Call:
## lm(formula = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) +
## factor(sex) + factor(smk) + factor(educ) + factor(alc), data = nhanes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53077 -0.07335 -0.00539 0.06647 0.58491
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.504e+00 1.399e-02 321.836 < 2e-16 ***
## log(bpb) 1.127e-02 2.635e-03 4.275 1.95e-05 ***
## age 3.169e-03 5.381e-04 5.889 4.14e-09 ***
## I(age^2) 1.444e-05 5.126e-06 2.817 0.004873 **
## bmi 4.857e-03 2.952e-04 16.453 < 2e-16 ***
## factor(race)2 1.484e-02 3.786e-03 3.920 8.97e-05 ***
## factor(sex)2 -3.295e-02 3.777e-03 -8.722 < 2e-16 ***
## factor(smk)2 -1.427e-02 4.280e-03 -3.333 0.000864 ***
## factor(smk)3 -7.149e-04 4.331e-03 -0.165 0.868907
## factor(educ)2 2.335e-03 3.673e-03 0.636 0.525031
## factor(educ)3 -1.718e-02 5.452e-03 -3.151 0.001636 **
## factor(alc)1 4.695e-03 3.687e-03 1.273 0.202981
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1148 on 4890 degrees of freedom
## (172 observations deleted due to missingness)
## Multiple R-squared: 0.4166, Adjusted R-squared: 0.4153
## F-statistic: 317.5 on 11 and 4890 DF, p-value: < 2.2e-16
sbp.lme<-lme(fixed = log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc), random=~1|strata,
na.action=na.omit, data=nhanes)
summary(sbp.lme)
## Linear mixed-effects model fit by REML
## Data: nhanes
## AIC BIC logLik
## -7180.906 -7089.976 3604.453
##
## Random effects:
## Formula: ~1 | strata
## (Intercept) Residual
## StdDev: 0.01329355 0.1140193
##
## Fixed effects: log(sbp) ~ log(bpb) + age + I(age^2) + bmi + factor(race) + factor(sex) + factor(smk) + factor(educ) + factor(alc)
## Value Std.Error DF t-value p-value
## (Intercept) 4.504420 0.014118879 4842 319.0352 0.0000
## log(bpb) 0.012464 0.002678746 4842 4.6528 0.0000
## age 0.002986 0.000537173 4842 5.5596 0.0000
## I(age^2) 0.000016 0.000005122 4842 3.2103 0.0013
## bmi 0.004846 0.000294231 4842 16.4707 0.0000
## factor(race)2 0.013309 0.004087288 4842 3.2562 0.0011
## factor(sex)2 -0.032535 0.003777089 4842 -8.6137 0.0000
## factor(smk)2 -0.014623 0.004267145 4842 -3.4269 0.0006
## factor(smk)3 -0.001351 0.004343337 4842 -0.3110 0.7558
## factor(educ)2 0.005259 0.003728817 4842 1.4102 0.1585
## factor(educ)3 -0.014102 0.005533291 4842 -2.5487 0.0108
## factor(alc)1 0.005399 0.003691217 4842 1.4627 0.1436
## Correlation:
## (Intr) lg(bp) age I(g^2) bmi fctr(r)2 fctr(sx)2 fctr(sm)2
## log(bpb) -0.127
## age -0.690 -0.106
## I(age^2) 0.645 0.051 -0.983
## bmi -0.398 0.036 -0.222 0.222
## factor(race)2 -0.042 -0.108 -0.009 0.040 -0.074
## factor(sex)2 -0.183 0.340 -0.059 0.050 -0.027 -0.028
## factor(smk)2 0.051 -0.015 -0.154 0.123 -0.023 0.057 0.192
## factor(smk)3 -0.044 -0.185 -0.115 0.135 0.105 -0.027 0.054 0.344
## factor(educ)2 -0.179 0.141 -0.026 0.047 0.047 -0.054 -0.034 -0.005
## factor(educ)3 -0.107 0.172 -0.109 0.112 0.086 0.026 0.062 0.051
## factor(alc)1 -0.188 -0.085 0.015 0.030 0.053 0.085 0.222 -0.070
## fctr(s)3 fctr(d)2 fctr(d)3
## log(bpb)
## age
## I(age^2)
## bmi
## factor(race)2
## factor(sex)2
## factor(smk)2
## factor(smk)3
## factor(educ)2 0.040
## factor(educ)3 0.131 0.415
## factor(alc)1 -0.150 -0.111 -0.134
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.66735508 -0.64180845 -0.05163683 0.58517448 5.12186931
##
## Number of Observations: 4902
## Number of Groups: 49
## Let's use the survey package
# sbp.svy<-svyglm(log(nhanes$sbp) ~ log(nhanes$bpb) + nhanes$age + I(nhanes$age^2) + nhanes$bmi + factor(nhanes$race) + factor(nhanes$sex) + factor(nhanes$smk) + factor(nhanes$educ) + factor(nhanes$alc), bpdsn)
# summary(sbp.svy)
# plot(sbp.svy)
# par(mfrow=c(2,2))
# plot(sbp.lm, which=1)
# plot(sbp.lm, which=2)
# plot(sbp.svy, which=1)
# plot(sbp.svy, which=2)
# par(mfrow=c(1,1))
## compare the beta's
# summary(sbp.lm)$coef[2,]
# summary(sbp.lme)$tTable[2,]
# summary(sbp.svy)$coef[2,]
#### Logistic regression model
# htn.svy<-svyglm(nhanes$htn ~ log(nhanes$bpb) + ns(nhanes$age,df=5) + ns(nhanes$bmi,df=5) + factor(nhanes$race) + factor(nhanes$sex) + factor(nhanes$educ) + nhanes$hematoc + nhanes$chol + nhanes$packyrs + nhanes$diag_dm, family=quasibinomial(), bpdsn)
# summary(htn.svy)
##Suppose you have the estimates from 16 cities of the effect of PM2.5
##on Myocardial Infarction (MI) hospital admissions.
##We want to obtain the combined effect across all the cities.
city<-1:16
coef<-c(0.00155,0.00445,-0.00597,0.00237,0.00031,-0.00035,0.00206,0.00127,
-0.00395,-0.00434,0.00241,0.00730,-0.00175,0.00117,0.00007,0.00350)
se<-c(0.013589379,0.003224905,0.002400626,0.001797566,0.001019834,0.002658267,
0.001529252,0.002388748,0.002583907,0.005885021,0.00122501,0.00597465,
0.002546777,0.002406304,0.000913846,0.00517299)
model<-meta.summaries(coef, se, names=city, method="random")
#method="random" estimates and adds a heterogeneity variance
summary(model)
## Random-effects meta-analysis
## Call: meta.summaries(d = coef, se = se, method = "random", names = city)
## ----------------------------------------------------
## Effect (lower 95% upper) weights
## 1 0.00 -0.03 0.03 0.0
## 2 0.00 0.00 0.01 0.5
## 3 -0.01 -0.01 0.00 0.8
## 4 0.00 0.00 0.01 1.3
## 5 0.00 0.00 0.00 2.5
## 6 0.00 -0.01 0.00 0.7
## 7 0.00 0.00 0.01 1.6
## 8 0.00 0.00 0.01 0.8
## 9 0.00 -0.01 0.00 0.7
## 10 0.00 -0.02 0.01 0.2
## 11 0.00 0.00 0.00 2.1
## 12 0.01 0.00 0.02 0.2
## 13 0.00 -0.01 0.00 0.7
## 14 0.00 0.00 0.01 0.8
## 15 0.00 0.00 0.00 2.8
## 16 0.00 -0.01 0.01 0.2
## ----------------------------------------------------
## Summary effect: 0 95% CI ( 0,0 )
## Estimated heterogeneity variance: 1.2e-06 p= 0.176
##Plot the summary results
metaplot(coef, se)
##Get combined estimate, its standard error and RR and 95% CI
model$summary
## [1] 0.0005330941
model$summ
## [1] 0.0005330941
model$se.summary
## [1] 0.0005977285
model$se
## [1] 0.0005977285
exp(model$summ*10)
## [1] 1.005345
exp((model$summ-1.96*model$se)*10)
## [1] 0.9936358
exp((model$summ+1.96*model$se)*10)
## [1] 1.017193